from gensim.test.utils import common_texts
from gensim.models import FastText
ft_model = FastText(common_texts, size=4, window=3, min_count=1, iter=10)
ft_model.init_sims(replace=True)
common_texts[:5]
similarities = ft_model.wv.most_similar(positive=['computer', 'human'], negative=['interface'])
most_similar = similarities[0]
print(most_similar)
not_matching = ft_model.wv.doesnt_match("human computer interface tree".split())
print(not_matching)
sim_score = ft_model.wv.similarity('computer', 'human')
print(sim_score)
sim_score = ft_model.wv.similarity('computer', 'interface')
print(sim_score)
from gensim.test.utils import common_texts
from gensim.models import word2vec
# 파라메터값 지정
num_features = 300 # 문자 벡터 차원 수
min_word_count = 40 # 최소 문자 수(10~100 사이가 적당. 어휘의 크기를 의미 있는 단어로 제한하는데 도움을 줌.)
num_workers = 4 # 병렬 처리 스레드 수(병렬 처리 스래드)
context = 10 # 문자열 창 크기(고려해야 할 주변 단어의 수)
downsampling = 1e-3 # 문자 빈도수 Downsample(구글문서는 0.00001에서 0.001사이의 값을 권장)
# 모델 학습
model = word2vec.Word2Vec(common_texts, min_count=1)
model.init_sims(replace=True)
common_texts[:5]
similarities = model.wv.most_similar(positive=['computer', 'human'], negative=['interface'])
most_similar = similarities[0]
print(most_similar)
not_matching = model.wv.doesnt_match("human computer interface tree".split())
print(not_matching)
sim_score = model.wv.similarity('computer', 'human')
print(sim_score)
sim_score = model.wv.similarity('computer', 'interface')
print(sim_score)
import itertools
from gensim.test.utils import common_texts
from glove import Corpus, Glove
corpus = Corpus()
corpus.fit(common_texts, window=10)
glove = Glove(no_components=100, learning_rate=0.05)
glove.fit(corpus.matrix, epochs=30, no_threads=4, verbose=True)
glove.add_dictionary(corpus.dictionary)
similarities = ft_model.wv.most_similar(positive=['computer', 'human'], negative=['interface'])
similarities = glove.most_similar('computer')
most_similar = similarities[0]
print(most_similar)